######################
#era5_raw
#paul stainier
#started 1/28/2020
#download the era5 weather data
#use instructions from https://cds.climate.copernicus.eu/how-to-api, using the R package reticulate to use python directly in R
#process it into .csv
#####################

library(sf)
library(ncdf4)
library(tidyverse)
library(reticulate)
library(lubridate)

rm(list = ls())
#########################
#toggle T/F for the part that 
#you want to run 
#########################
download_era5_temp <- T
download_era5_prec <- T
nc2csv_temp <- T
nc2csv_prec <- T

#########################
#replace with own directories before running
#########################
input_directory <- 
output_directory <- 



if(download_era5_temp){
  setwd(paste(input_directory, "temp", sep = "/"))
  py_install("cdsapi", pip=TRUE)
  cdsapi <- import('cdsapi')
  server = cdsapi$Client()

  for(year in c(2002:2025)){
    months = 1:12
    for(month in months){
      practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
      for(day in 1:days_in_month(practice_date)){
        query <- r_to_py(list(
          variable= "2m_temperature",
          product_type= "reanalysis",
          year= paste(year),
          month= paste(month), 
          day = paste(day),
          time= str_c(0:23,"00",sep=":")%>%str_pad(5,"left","0"),
          format= "netcdf",
          area = c(36, 67, 8, 98) #N/W/S/E
        )
        )
        
        
        server$retrieve("reanalysis-era5-single-levels",
                        query,
                        paste("era5_ta", year, "_", month, "_", day, ".nc", sep = "")
                        )
      }
    }
  }
}


if(download_era5_prec){
  setwd(paste(input_directory, "prec", sep = "/"))
  py_install("cdsapi", pip=TRUE)
  cdsapi <- import('cdsapi')
  server = cdsapi$Client()
  
  for(year in c(1979:2025)){
    for(month in 1:12){
      practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
      for(day in 1:days_in_month(practice_date)){
        query <- r_to_py(list(
          variable= "total_precipitation",
          product_type= "reanalysis",
          year= paste(year),
          month= paste(month), 
          day = paste(day),
          time= str_c(0:23,"00",sep=":")%>%str_pad(5,"left","0"),
          format= "netcdf",
          area = c(36, 67, 8, 98) #N/W/S/E
        )
        )
        
        
        server$retrieve("reanalysis-era5-single-levels",
                        query,
                        paste("era5_prec", year, "_", month, "_", day, ".nc", sep = "")
        )
      }
    }
  }
}

if(nc2csv_temp){
  for(year in c(2002:2024)){
    print(year)
    months = 1:12

    for(month in months){
      print(month)
      practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
      for(day in 1:days_in_month(practice_date)){
      setwd(paste(input_directory, "temp", sep = "/"))
      today_data <- nc_open(paste("era5_ta", year, "_", month, "_", day, ".nc", sep = ""))
      #print(today_data)
      lon <- ncvar_get(today_data, "longitude")
      lat <- ncvar_get(today_data, "latitude")
      today_temp_matrix <- ncvar_get(today_data, "t2m")
      if(year < 2016){
        t<- ncvar_get(today_data, "time")
        tunits<-ncatt_get(today_data,'time')
      }else{
        t<- ncvar_get(today_data, "valid_time")
        tunits<-ncatt_get(today_data,'valid_time')
      }
      
      if(year < 2016){
        tustr<- strsplit(tunits$units, " ")
        timestamp = as.POSIXct(t*3600,tz='GMT',origin=tustr[[1]][3])
        hour_numbers <- as.numeric(substr(timestamp, 12, 13))
        if(hour_numbers[5] != 4 | sum(hour_numbers, na.rm = T) != 276){
          break 
        }
      }
      
      
      yesterday_year <- year(ymd(year*10000+month*100+day) - 1)
      yesterday_month <- month(ymd(year*10000+month*100+day) - 1)
      yesterday_day <- day(ymd(year*10000+month*100+day) - 1)
      yesterday_data <- nc_open(paste("era5_ta", yesterday_year, "_", yesterday_month, "_", yesterday_day, ".nc", sep = ""))
      #print(yesterday_data)
      yesterday_temp_matrix <- ncvar_get(yesterday_data, "t2m")
      if(yesterday_year < 2016){
        t<- ncvar_get(yesterday_data, "time")
        tunits<-ncatt_get(yesterday_data,'time')
      }else{
        t<- ncvar_get(yesterday_data, "valid_time")
        tunits<-ncatt_get(yesterday_data,'valid_time')
      }
      if(year < 2016){
        tustr<- strsplit(tunits$units, " ")
        timestamp = as.POSIXct(t*3600,tz='GMT',origin=tustr[[1]][3])
        hour_numbers <- as.numeric(substr(timestamp, 12, 13))
        if(hour_numbers[5] != 4 | sum(hour_numbers, na.rm = T) != 276){
          break 
        }
      }
      
      #era5 time is utc, and india is 5.5 hours ahead of that
      #so the last 5 hours of "yesterday's" era5 are part of india's today
      max_matrix <- pmax(yesterday_temp_matrix[, , 19], yesterday_temp_matrix[, , 20], yesterday_temp_matrix[, , 21],
                         yesterday_temp_matrix[, , 22], yesterday_temp_matrix[, , 23], yesterday_temp_matrix[, , 24],
                         today_temp_matrix[, , 1], today_temp_matrix[, , 2], today_temp_matrix[, , 3],
                         today_temp_matrix[, , 4], today_temp_matrix[, , 5], today_temp_matrix[, , 6],
                         today_temp_matrix[, , 7], today_temp_matrix[, , 8], today_temp_matrix[, , 9],
                         today_temp_matrix[, , 10], today_temp_matrix[, , 11], today_temp_matrix[, , 12],
                         today_temp_matrix[, , 13], today_temp_matrix[, , 14], today_temp_matrix[, , 15],
                         today_temp_matrix[, , 16], today_temp_matrix[, , 17], today_temp_matrix[, , 18])
      
      max_vector <- as.data.frame(as.vector(max_matrix))
      max_vector$year <- year
      max_vector$month <- month
      max_vector$day <- day
      max_vector$lon <- 0
      max_vector$lat <- 0
      index = 1
      for(i in 1:113){
        for(j in 1:125){
          max_vector$lat[index] <- lat[i] 
          max_vector$lon[index] <- lon[j] 
          index = index + 1
        }
      }
      
      colnames(max_vector)[1] <- "tmax2m"
      max_vector$tmax2m <- max_vector$tmax2m*9/5 - 459.67
      setwd(paste(output_directory, "temp", sep = "/"))
      write.csv(max_vector, paste("tmax2m_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
      }
    }
  }
}

if(nc2csv_prec){
  for(year in c(1979:2024)){
    print(year)
    months = 1:12
    for(month in months){
      print(month)
      practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
      for(day in 1:days_in_month(practice_date)){
        if(year == 1979 & month == 1 & day == 1){
          setwd(paste(input_directory, "prec", sep = "/"))
          today_data <- nc_open(paste("era5_prec", year, "_", month, "_", day, ".nc", sep = ""))
          #print(today_data)
          lon <- ncvar_get(today_data, "longitude")
          lat <- ncvar_get(today_data, "latitude")
          today_prec_matrix <- ncvar_get(today_data, "tp")
          #climate data store changed from "time" to "valid_time" 
          #between when i downloaded the data up through 2015 and 
          #when i downloaded it for 2016-2023
          if(year < 2016){
            t<- ncvar_get(today_data, "time")
            tunits<-ncatt_get(today_data,'time')
          }else{
            t<- ncvar_get(today_data, "valid_time")
            tunits<-ncatt_get(today_data,'valid_time')
          }
          if(year < 2016){
            tustr<- strsplit(tunits$units, " ")
            timestamp = as.POSIXct(t*3600,tz='GMT',origin=tustr[[1]][3])
            hour_numbers <- as.numeric(substr(timestamp, 12, 13))
            if(hour_numbers[5] != 4 | sum(hour_numbers, na.rm = T) != 276){
              break 
            }
          }
          
          sum_matrix <-  today_prec_matrix[, , 1]+ today_prec_matrix[, , 2]+ today_prec_matrix[, , 3]+
            today_prec_matrix[, , 4]+ today_prec_matrix[, , 5]+ today_prec_matrix[, , 6]+
            today_prec_matrix[, , 7]+ today_prec_matrix[, , 8]+ today_prec_matrix[, , 9]+
            today_prec_matrix[, , 10]+ today_prec_matrix[, , 11]+ today_prec_matrix[, , 12]+
            today_prec_matrix[, , 13]+ today_prec_matrix[, , 14]+ today_prec_matrix[, , 15]+
            today_prec_matrix[, , 16]+ today_prec_matrix[, , 17]+ today_prec_matrix[, , 18]
          
          
          
          
          sum_vector <- as.data.frame(as.vector(sum_matrix))
          colnames(sum_vector)[1] <- "prec"
          sum_vector$prec <- ifelse(sum_vector$prec > 1e-17, sum_vector$prec,0 )
          sum_vector$year <- year
          sum_vector$month <- month
          sum_vector$day <- day
          sum_vector$lon <- 0
          sum_vector$lat <- 0
          index = 1
          for(i in 1:113){
            for(j in 1:125){
              sum_vector$lat[index] <- lat[i] 
              sum_vector$lon[index] <- lon[j] 
              index = index + 1
            }
          }
          
          sum_vector$prec <- sum_vector$prec*1000 #meters to milimeters
          setwd(paste(output_directory, "prec", sep = "/"))
          write.csv(sum_vector, paste("totalprec_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
        }
        else{
          setwd(paste(input_directory, "prec", sep = "/"))
          today_data <- nc_open(paste("era5_prec", year, "_", month, "_", day, ".nc", sep = ""))
          #print(today_data)
          lon <- ncvar_get(today_data, "longitude")
          lat <- ncvar_get(today_data, "latitude")
          today_prec_matrix <- ncvar_get(today_data, "tp")
          if(year < 2016){
            t<- ncvar_get(today_data, "time")
            tunits<-ncatt_get(today_data,'time')
          }else{
            t<- ncvar_get(today_data, "valid_time")
            tunits<-ncatt_get(today_data,'valid_time')
          }
          tustr<- strsplit(tunits$units, " ")
          if(year < 2016){
            tustr<- strsplit(tunits$units, " ")
            timestamp = as.POSIXct(t*3600,tz='GMT',origin=tustr[[1]][3])
            hour_numbers <- as.numeric(substr(timestamp, 12, 13))
            if(hour_numbers[5] != 4 | sum(hour_numbers, na.rm = T) != 276){
              break 
            }
          }
          
          yesterday_year <- year(ymd(year*10000+month*100+day) - 1)
          yesterday_month <- month(ymd(year*10000+month*100+day) - 1)
          yesterday_day <- day(ymd(year*10000+month*100+day) - 1)
          yesterday_data <- nc_open(paste("era5_prec", yesterday_year, "_", yesterday_month, "_", yesterday_day, ".nc", sep = ""))
          #print(yesterday_data)
          yesterday_prec_matrix <- ncvar_get(yesterday_data, "tp")
          if(yesterday_year < 2016){
            t<- ncvar_get(yesterday_data, "time")
            tunits<-ncatt_get(yesterday_data,'time')
          }else{
            t<- ncvar_get(yesterday_data, "valid_time")
            tunits<-ncatt_get(yesterday_data,'valid_time')
          }
          if(year < 2016){
            tustr<- strsplit(tunits$units, " ")
            timestamp = as.POSIXct(t*3600,tz='GMT',origin=tustr[[1]][3])
            hour_numbers <- as.numeric(substr(timestamp, 12, 13))
            if(hour_numbers[5] != 4 | sum(hour_numbers, na.rm = T) != 276){
              break 
            }
          }
          
          sum_matrix <- yesterday_prec_matrix[, , 19]+ yesterday_prec_matrix[, , 20]+ yesterday_prec_matrix[, , 21]+
            yesterday_prec_matrix[, , 22]+ yesterday_prec_matrix[, , 23]+ yesterday_prec_matrix[, , 24]+
            today_prec_matrix[, , 1]+ today_prec_matrix[, , 2]+ today_prec_matrix[, , 3]+
            today_prec_matrix[, , 4]+ today_prec_matrix[, , 5]+ today_prec_matrix[, , 6]+
            today_prec_matrix[, , 7]+ today_prec_matrix[, , 8]+ today_prec_matrix[, , 9]+
            today_prec_matrix[, , 10]+ today_prec_matrix[, , 11]+ today_prec_matrix[, , 12]+
            today_prec_matrix[, , 13]+ today_prec_matrix[, , 14]+ today_prec_matrix[, , 15]+
            today_prec_matrix[, , 16]+ today_prec_matrix[, , 17]+ today_prec_matrix[, , 18]
          
          
          
          
          sum_vector <- as.data.frame(as.vector(sum_matrix))
          colnames(sum_vector)[1] <- "prec"
          sum_vector$prec <- ifelse(sum_vector$prec > 1e-17, sum_vector$prec,0 )
          sum_vector$year <- year
          sum_vector$month <- month
          sum_vector$day <- day
          sum_vector$lon <- 0
          sum_vector$lat <- 0
          index = 1
          for(i in 1:113){
            for(j in 1:125){
              sum_vector$lat[index] <- lat[i] 
              sum_vector$lon[index] <- lon[j] 
              index = index + 1
            }
          }
          
          sum_vector$prec <- sum_vector$prec*1000 #meters to milimeters
          setwd(paste(output_directory, "prec", sep = "/"))
          write.csv(sum_vector, paste("totalprec_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
        }
      }
    }
  }
}
